library(dplyr)
library(ggplot2)
library(effsize)
library(forcats)
# Set working directory if needed on your local machine
setwd("/Users/anirban/MyLab/Manuscript/Mofida_Polk-aging/Mar26_VOR/Plot_source.data")
# Read data
input_file <- "Fig5C+Sup1_data.csv"
data2 <- read.csv(input_file, header = TRUE)
library(dplyr)
library(ggplot2)
library(effsize)
library(forcats)
# Set working directory if needed on your local machine
setwd("//Users/anirban/MyLab/Manuscript/Mofida_Polk-aging/Mar26_VOR/Plot_source.data/Figure5")
# Read data
input_file <- "Fig5C+Sup1_data.csv"
data2 <- read.csv(input_file, header = TRUE)
# Rearrange factor levels
data2$brain_area <- factor(data2$brain_area, levels = c("Cg1", "M1", "M2", "S1"))
data2$celltype <- factor(data2$celltype, levels = c("IN", "PN"))
data2$Age_Grp <- factor(data2$Age_Grp, levels = c("middle", "earlyOld", "lateOld"))
data2$MGAdjacent_count <- factor(data2$MGAdjacent_count, levels = c("MG free", "MG tied"))
########################
# Helper functions
########################
format_p_value <- function(p) {
if (is.na(p)) {
return("p = NA")
} else if (p < 1e-4) {
return(paste0("p = ", formatC(p, format = "e", digits = 3)))
} else {
return(paste0("p = ", formatC(p, format = "f", digits = 4)))
}
}
calc_stats_table <- function(df, y_col, y_top) {
df %>%
group_by(Age_Grp, celltype) %>%
group_modify(~{
g_free <- .x %>%
filter(MGAdjacent_count == "MG free") %>%
pull(all_of(y_col)) %>%
.[!is.na(.)]
g_tied <- .x %>%
filter(MGAdjacent_count == "MG tied") %>%
pull(all_of(y_col)) %>%
.[!is.na(.)]
if (length(g_free) < 2 || length(g_tied) < 2) {
return(data.frame(
p_value = NA_real_,
d_value = NA_real_,
label = "p = NA\nd = NA",
label_y = y_top * 0.98
))
}
# Mann-Whitney U test (Wilcoxon rank-sum test in R)
wt <- wilcox.test(g_free, g_tied, exact = FALSE)
# add Cohen's d in the plot layout
d_obj <- cohen.d(g_tied, g_free, hedges.correction = FALSE)
d_val <- unname(d_obj$estimate)
data.frame(
p_value = wt$p.value,
d_value = d_val,
label = paste0(
format_p_value(wt$p.value),
"\n",
"d = ", sprintf("%.2f", d_val)
),
label_y = y_top * 0.98
)
}) %>%
ungroup()
}
make_n_table <- function(df, y_col, y_top) {
df %>%
group_by(Age_Grp, celltype, MGAdjacent_count) %>%
summarise(
n = sum(!is.na(.data[[y_col]])),
.groups = "drop"
) %>%
mutate(
label = paste0("n = ", n),
label_y = y_top * -0.06,
celltype_num = as.numeric(factor(celltype, levels = c("IN", "PN"))),
x_pos = case_when(
MGAdjacent_count == "MG free" ~ celltype_num - 0.22,
MGAdjacent_count == "MG tied" ~ celltype_num + 0.22,
TRUE ~ celltype_num
)
)
}
make_boxplot <- function(df, y_col, y_label, y_top) {
stats_df <- calc_stats_table(df, y_col, y_top)
n_df <- make_n_table(df, y_col, y_top)
dodge <- position_dodge(width = 0.75)
ggplot(df, aes(x = celltype, y = .data[[y_col]], fill = MGAdjacent_count)) +
geom_boxplot(
color = "black",
notch = FALSE,
alpha = 0.25,
width = 0.6,
outlier.alpha = 0.25,
position = dodge
) +
geom_text(
data = stats_df,
aes(x = celltype, y = label_y, label = label),
inherit.aes = FALSE,
size = 4.5,
lineheight = 0.9,
vjust = 1
) +
geom_text(
data = n_df,
aes(x = x_pos, y = label_y, label = label),
inherit.aes = FALSE,
size = 3.7,
vjust = 1
) +
facet_grid(Age_Grp ~ .) +
scale_fill_manual(values = c("MG free" = "cyan", "MG tied" = "purple")) +
coord_cartesian(ylim = c(-0.10 * y_top, y_top), clip = "off") +
labs(
x = "celltype",
y = y_label,
fill = "MGAdjacent_count"
) +
theme_minimal() +
theme(
axis.title = element_text(size = 16),
axis.text = element_text(size = 16),
axis.text.x = element_text(margin = margin(t = 8)),
strip.text = element_text(size = 18),
legend.position = "right",
panel.grid.minor = element_blank(),
plot.title = element_text(size = 18, hjust = 0.5),
plot.margin = margin(15, 15, 25, 15)
)
}
########################
# CytoPolk median intensity
########################
graph1 <- make_boxplot(
df = data2,
y_col = "CytoPolk_median.intensity",
y_label = "Cytoplasmic POLK median intensity (A.U.)",
y_top = 1.0
)
print(graph1)
########################
# Nuc.Polk median intensity
########################
graph2 <- make_boxplot(
df = data2,
y_col = "Nuc.Polk_median.intensity",
y_label = "Nuclear POLK median intensity (A.U.)",
y_top = 1.0
)
print(graph2)
########################
# CytoPolk per area
########################
graph3 <- make_boxplot(
df = data2,
y_col = "CytoPolkperArea",
y_label = "Cytoplasmic POLK counts",
y_top = 1000
)
print(graph3)
########################
# Optional: save plots
########################
# ggsave("graph1_with_MWU_p_d_n.pdf", graph1, width = 7, height = 11)
# ggsave("graph2_with_MWU_p_d_n.pdf", graph2, width = 7, height = 11)
# ggsave("graph3_with_MWU_p_d_n.pdf", graph3, width = 7, height = 11)
library(dplyr)
library(ggplot2)
library(effsize)
library(forcats)
# Set working directory if needed on your local machine
setwd("/Users/anirban/MyLab/Manuscript/Mofida_Polk-aging/Mar26_VOR/Plot_source.data/Figure5")
# Read data
input_file <- "Fig5C+Sup1_data.csv"
data <- read.csv(input_file, header = TRUE)
# Rearrange factor levels
data$brain_area <- factor(data$brain_area, levels = c("Cg1", "M1", "M2", "S1"))
data$celltype <- factor(data$celltype, levels = c("IN", "PN"))
data$Age_Grp <- factor(data$Age_Grp, levels = c("middle", "earlyOld", "lateOld"))
data$MGAdjacent_count <- factor(data$MGAdjacent_count, levels = c("MG free", "MG tied"))
########################
# Helper functions
########################
format_p_value <- function(p) {
if (is.na(p)) {
return("p = NA")
} else if (p < 1e-4) {
return(paste0("p = ", formatC(p, format = "e", digits = 3)))
} else {
return(paste0("p = ", formatC(p, format = "f", digits = 4)))
}
}
calc_stats_table <- function(df, y_col, y_top) {
df %>%
group_by(Age_Grp, celltype) %>%
group_modify(~{
g_free <- .x %>%
filter(MGAdjacent_count == "MG free") %>%
pull(all_of(y_col)) %>%
.[!is.na(.)]
g_tied <- .x %>%
filter(MGAdjacent_count == "MG tied") %>%
pull(all_of(y_col)) %>%
.[!is.na(.)]
if (length(g_free) < 2 || length(g_tied) < 2) {
return(data.frame(
p_value = NA_real_,
d_value = NA_real_,
label = "p = NA\nd = NA",
label_y = y_top * 0.98
))
}
# Mann-Whitney U test (Wilcoxon rank-sum test in R)
wt <- wilcox.test(g_free, g_tied, exact = FALSE)
# add Cohen's d in the plot layout
d_obj <- cohen.d(g_tied, g_free, hedges.correction = FALSE)
d_val <- unname(d_obj$estimate)
data.frame(
p_value = wt$p.value,
d_value = d_val,
label = paste0(
format_p_value(wt$p.value),
"\n",
"d = ", sprintf("%.2f", d_val)
),
label_y = y_top * 0.98
)
}) %>%
ungroup()
}
make_n_table <- function(df, y_col, y_top) {
df %>%
group_by(Age_Grp, celltype, MGAdjacent_count) %>%
summarise(
n = sum(!is.na(.data[[y_col]])),
.groups = "drop"
) %>%
mutate(
label = paste0("n = ", n),
label_y = y_top * -0.06,
celltype_num = as.numeric(factor(celltype, levels = c("IN", "PN"))),
x_pos = case_when(
MGAdjacent_count == "MG free" ~ celltype_num - 0.22,
MGAdjacent_count == "MG tied" ~ celltype_num + 0.22,
TRUE ~ celltype_num
)
)
}
make_boxplot <- function(df, y_col, y_label, y_top) {
stats_df <- calc_stats_table(df, y_col, y_top)
n_df <- make_n_table(df, y_col, y_top)
dodge <- position_dodge(width = 0.75)
ggplot(df, aes(x = celltype, y = .data[[y_col]], fill = MGAdjacent_count)) +
geom_boxplot(
color = "black",
notch = FALSE,
alpha = 0.25,
width = 0.6,
outlier.alpha = 0.25,
position = dodge
) +
geom_text(
data = stats_df,
aes(x = celltype, y = label_y, label = label),
inherit.aes = FALSE,
size = 4.5,
lineheight = 0.9,
vjust = 1
) +
geom_text(
data = n_df,
aes(x = x_pos, y = label_y, label = label),
inherit.aes = FALSE,
size = 3.7,
vjust = 1
) +
facet_grid(Age_Grp ~ .) +
scale_fill_manual(values = c("MG free" = "cyan", "MG tied" = "purple")) +
coord_cartesian(ylim = c(-0.10 * y_top, y_top), clip = "off") +
labs(
x = "celltype",
y = y_label,
fill = "MGAdjacent_count"
) +
theme_minimal() +
theme(
axis.title = element_text(size = 16),
axis.text = element_text(size = 16),
axis.text.x = element_text(margin = margin(t = 8)),
strip.text = element_text(size = 18),
legend.position = "right",
panel.grid.minor = element_blank(),
plot.title = element_text(size = 18, hjust = 0.5),
plot.margin = margin(15, 15, 25, 15)
)
}
########################
# CytoPolk median intensity
########################
graph1 <- make_boxplot(
df = data,
y_col = "CytoPolk_median.intensity",
y_label = "Cytoplasmic POLK median intensity (A.U.)",
y_top = 1.0
)
print(graph1)
########################
# Nuc.Polk median intensity
########################
graph2 <- make_boxplot(
df = data,
y_col = "Nuc.Polk_median.intensity",
y_label = "Nuclear POLK median intensity (A.U.)",
y_top = 1.0
)
print(graph2)
########################
# CytoPolk per area
########################
graph3 <- make_boxplot(
df = data,
y_col = "CytoPolkperArea",
y_label = "Cytoplasmic POLK counts",
y_top = 1000
)
print(graph3)
########################
# Optional: save plots
########################
# ggsave("graph1_with_MWU_p_d_n.pdf", graph1, width = 7, height = 11)
# ggsave("graph2_with_MWU_p_d_n.pdf", graph2, width = 7, height = 11)
# ggsave("graph3_with_MWU_p_d_n.pdf", graph3, width = 7, height = 11)
library(dplyr)
library(ggplot2)
library(ggplotgui)
library(esquisse)
library(effsize)
library(forcats)
library(ggpubr)
# Set this to the folder containing Figure1F_data.csv
setwd("/Users/anirban/MyLab/Manuscript/Mofida_Polk-aging/Mar26_VOR/Plot_source.data/Figure1")
# Read data IN and PN
data1 <- read.table("./Figure1F_data.csv", header = TRUE, sep = ",")
# Rearrange factor levels
data1$area <- factor(data1$area, levels=c("Cg1","M1", "M2", "S1"))
data1$age <- factor(data1$age, levels = c("1M", "10M", "18M"))
# Define comparisons
comparisons <- list(
c("1M", "10M"),
c("10M", "18M"),
c("1M", "18M")
)
# boxPlot NucPolk per area
graph1 <- ggplot(data1, aes(x = age, y = nucPolkPerArea, fill = age)) +
geom_boxplot(notch = TRUE, alpha = 0.3) +
scale_y_continuous(breaks = seq(0, 100, by = 20)) + #set ticks every 20, between 0-->100
stat_compare_means(comparisons = comparisons, method = "wilcox.test") +  # Mann–Whitney U test (implemented in R as wilcox.test)
facet_grid( area ~ . )+
labs(colour = 'title legend')+
scale_colour_brewer(palette = 'Dark2') +
theme_classic() +
theme(
legend.position = 'bottom'
)
graph1
# boxPlot CytoPolk per area
graph2 <- ggplot(data1, aes(x = age, y = cytoPolkPerArea, fill = age)) +
geom_boxplot(notch = TRUE, alpha = 0.3) +
scale_y_continuous(breaks = seq(0, 100, by = 20)) + #set ticks every 20, between 0-->100
stat_compare_means(comparisons = comparisons, method = "wilcox.test") +  # Mann–Whitney U test (implemented in R as wilcox.test)
facet_grid( area ~ . )+
labs(colour = 'title legend')+
scale_colour_brewer(palette = 'Dark2') +
theme_classic() +
theme(
legend.position = 'bottom'
)
graph2
